Student Depression Analysis¶

Set up¶

In [1]:
import plotly.express as px
import pandas as pd

df = pd.read_csv("data/student_depression_dataset.csv")

# Palette colors
COL_OK = "#1E88E5"
COL_RISK = "#D81B60"
TEMPLATE = "presentation"


def bump_fonts(fig, base=20):
    """Aumenta i font di titoli, assi e legenda."""
    fig.update_layout(
        font=dict(size=base),
        title_font=dict(size=base + 2),
        legend_font=dict(size=base),
    )
    fig.update_xaxes(title_font=dict(size=base), tickfont=dict(size=base - 2))
    fig.update_yaxes(title_font=dict(size=base), tickfont=dict(size=base - 2))
    return fig

Graph A - Gender composition¶

Roughly three-fifths of respondents are male, two-fifths female¶

In [2]:
gender_counts = df["Gender"].value_counts().reset_index()
gender_counts.columns = ["Gender", "count"]

fig_gender_pie = px.pie(
    gender_counts,
    names="Gender",
    values="count",
    template=TEMPLATE,
    color_discrete_sequence=[COL_OK, COL_RISK],
    title="<b>Gender composition</b>",
    hole=0                         
)
bump_fonts(fig_gender_pie)
fig_gender_pie.update_traces(textposition="inside", texttemplate="%{percent:.1%}")
fig_gender_pie.show()

Graph B - Depression across genders¶

Depression prevalence is similar across genders¶

In [3]:
gender_tab = (
    df.groupby(["Gender", "Depression"]).size()
      .reset_index(name="count")
)
gender_tab["DepressionLabel"] = gender_tab["Depression"].map({0: "No", 1: "Yes"})
gender_tab["percent"] = (
    gender_tab.groupby("Gender")["count"]
              .transform(lambda x: x / x.sum() * 100)
              .round(1)
)

fig_gender_dep = px.bar(
    gender_tab,
    x="Gender",
    y="percent",
    color="DepressionLabel",
    color_discrete_map={"No": COL_OK, "Yes": COL_RISK},
    template=TEMPLATE,
    barmode="stack",
    text="percent",
    title="<b>Depression prevalence is similar across genders</b>",
    labels={"percent": "Percent (%)"},
)
fig_gender_dep.update_traces(texttemplate="%{text:.1f}%")
bump_fonts(fig_gender_dep).update_layout(legend_title_text="", yaxis_range=[0, 100])

Graph C – Age distribution¶

Most of the sample is young: roughly three-quarters of respondents are between 18 and 30 years old, with a median age of 25.¶
In [4]:
df_35 = df[df['Age'] < 30].copy()

fig_age = px.histogram(
    df_35,
    x="Age",
    nbins=20,
    color_discrete_sequence=[COL_OK],
    template=TEMPLATE,
    title="<b>Age distribution</b>",
    labels={"Age": "Age (years)", "count": "Frequence"},
).update_layout(bargap=0.05)
bump_fonts(fig_age)

fig_age.show()

Graph D - Depression distribution by age¶

Depression levels decline progressively with aging¶

In [5]:
df['Depression'] = df['Depression'].map({1: 'Yes', 0: 'No'})

# keep only ages < 35 and cast to string for clean x-axis
df_35 = df[df['Age'] < 35].copy()
df_35["age_str"] = df_35["Age"].astype(int).astype(str)

age_order = sorted(df_35["Age"].astype(int).unique())
age_order_str = [str(a) for a in age_order]

fig_age_pct = px.histogram(
    df_35,
    x="age_str",
    color="Depression",
    category_orders={"age_str": age_order_str},
    barmode="stack",
    barnorm="percent",
    title="<b>Depression levels decline progressively with aging </b>",
    labels={"age_str": "Age", "percent": "Percent (%)"},
    color_discrete_map={"Yes": COL_RISK, "No": COL_OK},
)

fig_age_pct.update_traces(texttemplate="%{y:.1f}", textposition="inside")
bump_fonts(fig_age_pct).update_layout(
    yaxis_range=[0, 100],
    legend_title_text="",
    xaxis_title="Age",
    yaxis_title="Percent (%)"
)

fig_age_pct.show()
df = pd.read_csv("data/student_depression_dataset.csv")

Graphic E - Depression prevalence by dietary habits¶

Less-healthy diets show markedly higher depression prevalence¶

In [6]:
df_diet = df[df["Dietary Habits"].str.lower() != "others"]

diet_dep = (
    df_diet.groupby(["Dietary Habits", "Depression"]).size()
           .reset_index(name="count")
)
diet_dep["DepLabel"] = diet_dep["Depression"].map({0: "Not depressed", 1: "Depression"})
diet_dep["percent"] = (
    diet_dep.groupby("Dietary Habits")["count"]
            .transform(lambda x: x / x.sum() * 100)
            .round(1)
)

# Ensure “Depression” is stacked first (bottom) and “Not depressed” stacked on top
dep_order = ["Depression", "Not depressed"]
diet_dep["DepLabel"] = pd.Categorical(diet_dep["DepLabel"], categories=dep_order, ordered=True)
diet_dep = diet_dep.sort_values("DepLabel")

fig_diet_dep = px.bar(
    diet_dep,
    x="Dietary Habits",
    y="percent",
    color="DepLabel",
    color_discrete_map={"Not depressed": COL_OK, "Depression": COL_RISK},
    template=TEMPLATE,
    barmode="stack",
    text="percent",
    title="<b>Less-healthy diets show markedly higher depression prevalence</b>",
    labels={"Dietary Habits": "Diet quality", "percent": "Percent (%)"},
)
fig_diet_dep.update_traces(texttemplate="%{text:.1f}%")
bump_fonts(fig_diet_dep).update_layout(
    legend_title_text="",
    xaxis_tickangle=0,
    yaxis_range=[0, 100]
)
fig_diet_dep.show()

Graph F – Suicidal thoughts → depression¶

Students who report suicidal thoughts show about four-times the prevalence of depression (≈ 79 % vs 21 %), highlighting a strong association between the two.¶

In [7]:
# GRAPH F – Suicidal thoughts vs depression (%)
suic = (
    df.groupby(["Have you ever had suicidal thoughts ?", "Depression"])
    .size()
    .reset_index(name="count")
)
suic["DepressionLabel"] = suic["Depression"].map({0: "Not depressed", 1: "Depressed"})
suic["percent"] = (
    suic.groupby("Have you ever had suicidal thoughts ?")["count"]
    .transform(lambda x: x / x.sum() * 100)
    .round(1)
)
fig_suic = px.bar(
    suic,
    x="Have you ever had suicidal thoughts ?",
    y="percent",
    color="DepressionLabel",
    color_discrete_map={"Not depressed": COL_OK, "Depressed": COL_RISK},
    template=TEMPLATE,
    text="percent",
    barmode="stack",
    title="<b>Suicidal thoughts → 4x depression</b>",
    labels={
        "Have you ever had suicidal thoughts ?": "Sucidal thoughts",
        "percent": "Percentage (%)",
    },
).update_traces(texttemplate="%{text:.1f}%")
bump_fonts(fig_suic).update_layout(legend_title_text="", yaxis_range=[0, 100])
fig_suic.show()

Graph G – Financial stress → depression¶

Depression prevalence climbs steadily with financial stress, rising from roughly 32 % at stress level 1 to over 70 % at level 5.¶

In [8]:
# GRAPH F – Financial stress vs depression (%)
fin = (
    df
    .groupby(["Financial Stress", "Depression"])
    .size()
    .reset_index(name="count")
)
fin["DepressionLabel"] = fin["Depression"].map({0: "No depression", 1: "Depressed"})
fin["percent"] = (
    fin
    .groupby("Financial Stress")["count"]
    .transform(lambda x: x / x.sum() * 100)
    .round(1)
)
fig_fin = px.bar(
    fin,
    x="Financial Stress",
    y="percent",
    color="DepressionLabel",
    category_orders={"DepressionLabel": ["No Depressed", "Depressed"]},
    color_discrete_map={"Depressed": COL_RISK, "No depression": COL_OK},
    template=TEMPLATE,
    text="percent",
    barmode="stack",
    title="<b>Financial stress increases depression level</b>",
    labels={
        "Financial Stress": "Financial Stress (1-5)",
        "percent": "Percentage (%)",
    },
)
fig_fin.update_traces(texttemplate="%{text:.1f}%")
bump_fonts(fig_fin).update_layout(
    legend_title_text="",
    yaxis_range=[0, 100]
)
fig_fin.show()

Graph H – Sleep duration vs depression¶

A clear monotonic pattern: the less people sleep, the higher the prevalence of depression. Seven-to-eight-hour sleepers show the lowest rate (< 30 %), while the “< 5 hours” group exceeds 70 %.¶

In [9]:
mapping = {
    "'Less than 5 hours'": "Less than 5 hours",
    "'5-6 hours'":        "5-6 hours",
    "'7-8 hours'":        "7-8 Hours",
    "'More than 8 hours'": "More than 8 hours" 
}
df_filtered = df[df["Sleep Duration"].isin(mapping.keys())].copy()
df_filtered["SleepHoursCat"] = df_filtered["Sleep Duration"].map(mapping)

sleep = (
    df_filtered
    .groupby(["SleepHoursCat", "Depression"])
    .size()
    .reset_index(name="count")
)
sleep["DepressionLabel"] = sleep["Depression"].map({0: "No", 1: "Yes"})
sleep["percent"] = (
    sleep
    .groupby("SleepHoursCat")["count"]
    .transform(lambda x: x / x.sum() * 100)
    .round(1)
)

fig_sleep_dep = px.bar(
    sleep,
    x="SleepHoursCat",
    y="percent",
    color="DepressionLabel",
    color_discrete_map={"No": COL_OK, "Yes": COL_RISK},
    template=TEMPLATE,
    barmode="stack",
    text="percent",
    title="<b>Less sleep → Higher depression prevalence</b>",
    labels={
        "SleepHoursCat": "Sleep duration",
        "percent": "Percent (%)"
    },
    category_orders={
        "SleepHoursCat": ["Less than 5 hours", "5-6 hours", "7-8 Hours", "More than 8 hours"],
        "DepressionLabel": ["Depressed", "Not Depressed"]
    }
)

fig_sleep_dep.update_traces(texttemplate="%{text:.1f}%")
bump_fonts(fig_sleep_dep).update_layout(
    legend_title_text="",
    yaxis_range=[0, 100]
)
fig_sleep_dep.show()

Graph I - CGPA vs academic pressure¶

Higher academic pressure slightly lowers median CGPA¶

In [10]:
pressure_box = px.box(
    df[df["Academic Pressure"].between(1, 5)],
    x="Academic Pressure",
    y="CGPA",
    points="outliers",
    template=TEMPLATE,
    color_discrete_sequence=[COL_RISK],
    title="<b>Higher academic pressure does not effect median CGPA</b>",
    labels={"Academic Pressure": "Pressure (1-5)", "CGPA": "CGPA"},
)
pressure_box.update_traces(boxmean=True)
bump_fonts(pressure_box).update_layout(xaxis_range=[0.5, 5.5], yaxis_range=[3.9, 11.2])
pressure_box.show()

Graph J – Work/Study-hours vs academic pressure¶

Higher academic pressure is associated with longer study hours¶

In [11]:
fig_hours_box = px.box(
    df[df["Academic Pressure"].between(1, 5)],
    x="Academic Pressure",
    y="Work/Study Hours",
    category_orders={"Academic Pressure": [1, 2, 3, 4, 5]},
    points="outliers",
    template=TEMPLATE,
    color_discrete_sequence=[COL_RISK],
    title="<b>Higher academic pressure is associated with longer study hours</b>",
    labels={"Academic Pressure": "Pressure (1-5)", "Work/Study Hours": "Hours per day"},
)
fig_hours_box.update_traces(boxmean=True)
bump_fonts(fig_hours_box).update_layout(xaxis_range=[0.5, 5.5])
fig_hours_box.show()